/*
 Copyright 2013--Present JMM_PROGNAME

 This file is distributed under the terms of the JMM_PROGNAME License.

 You should have received a copy of the JMM_PROGNAME License.
 If not, see <JMM_PROGNAME WEBSITE>.
*/
// CREATED    : 7/5/2015
// LAST UPDATE: 10/1/2015

#include "statsxx/machine_learning/Gaussian_process/GaussianProcess.hpp"

// STL
#include <cmath>     // std::log()
#include <memory>    // std::unique_ptr<>
#include <stdexcept> // std::runtime_error()
#include <tuple>     // std::tie()

// jScience
#include "jScience/linalg.hpp" // Matrix<>, Vector<>, inverse(), dot_product(), *, determinant()
#include "jScience/physics/consts.hpp" // PI

// stats++
#include "statsxx/statistics/CovarianceFunction.hpp" // CovarianceFunction


inline Gaussian_process::GaussianProcess::GaussianProcess() {};

inline Gaussian_process::GaussianProcess::GaussianProcess(std::unique_ptr<CovarianceFunction> const &k_arg)
{
    k = k_arg->clone();

//    this->data_addded = false;
}

inline Gaussian_process::GaussianProcess::~GaussianProcess() {};


//========================================================================
//========================================================================
//
// NAME: void GaussianProcess::add_data(const Matrix<double> &X_arg, const Vector<double> &y_arg, const double sigma_n_arg)
//
// DESC:
//
//========================================================================
//========================================================================
inline void Gaussian_process::GaussianProcess::add_data(
                                                        const Matrix<double> &X_arg,
                                                        const Vector<double> &y_arg,
                                                        const double sigma_n_arg
                                                        )
{
    // assign training data
    this->X = X_arg;
    this->y = y_arg;
    this->sigma_n = sigma_n_arg;

    // compute K
    Ky = Matrix<double>(X_arg.size(0), X_arg.size(0));

    for( auto i = 0; i < X_arg.size(0); ++i )
    {
        for( auto j = 0; j < X_arg.size(0); ++j )
        {
            Ky(i,j) = this->k->cov(X_arg.row(i), X_arg.row(j));

            if( j == i )
            {
                Ky(i,j) += sigma_n_arg;
            }
        }
    }

    // compute K^-1
    int INFO0;
    int INFO;
    std::tie(INFO0, INFO, this->Kyinv) = inverse(Ky);

    if( INFO0 != 0 )
    {
        throw std::runtime_error("Error in GaussianProcess::add_data(): INFO0 != 0 on return from inverse()");
    }

    if( INFO != 0 )
    {
        throw std::runtime_error("Error in GaussianProcess::add_data(): INFO != 0 on return from inverse()");
    }

    // compute Ky^-1*y
    this->alpha = this->Kyinv*y_arg;

//    this->data_addded = true;
}


// note: see p. 118 in "Gaussian Processes for Machine Learning"; the marginal likelihood tells us the probability of the observations given the assumptions of the model, whereas the LOO-CV value gives an estimate of the (log) predictive probability, whether or not the assumptions of the model may be fulfilled.

/*
//========================================================================
//========================================================================
//
// NAME: double GaussianProcess::likelihood()
//
// DESC: Compute the marginal log likelihood.
//
//========================================================================
//========================================================================
inline double Gaussian_process::GaussianProcess::likelihood()
{
    // ***
    // << jmm: prevents |Ky| from reaching <= 0 >>
    const double min_detK = 1.0e-10;

    //=========================================================
    // ***

    // compute the determinant of K
    int info;
    double detK;
    std::tie(info, detK) = determinant(this->Ky);

    if(info != 0)
    {
        throw std::runtime_error("error in GaussianProcess::likelihood(): cannot compute the determinant of K");
    }

    // ***
    detK = std::max(detK, min_detK);
    // ***

    return (0.5*(-dot_product(this->y, this->alpha) - std::log(detK) - static_cast<double>(this->X.size(0))*std::log(2.0*PI)));
}


//========================================================================
//========================================================================
//
// NAME: std::vector<double> GaussianProcess::dlikelihood()
//
// DESC:
//
//========================================================================
//========================================================================
inline std::vector<double> Gaussian_process::GaussianProcess::dlikelihood()
{
    // the total number of hyperparameters is the number of those in the covariance function + 1 noise term
    int nhyperparameters = k->nhyperparameters() + 1;

    std::vector<double> dLdtheta(nhyperparameters, 0.0);

    std::vector<Matrix<double>> dKdtheta(nhyperparameters, Matrix<double>(this->X.size(0), this->X.size(0)));

    for( auto i = 0; i < this->X.size(0); ++i )
    {
        for( auto j = 0; j < this->X.size(0); ++j )
        {
            // add the covariance function hyperparameters
            std::vector<double> dKdthetaij = this->k->ddtheta(this->X.row(i), this->X.row(j));

            for(auto l = 0; l < k->nhyperparameters(); ++l)
            {
                dKdtheta[l](i,j) = dKdthetaij[l];
            }

            // add the noise hyperparameter
            if(j != i)
            {
                dKdtheta[(nhyperparameters-1)](i,j) = 0.0;
            }
            else
            {
                dKdtheta[(nhyperparameters-1)](i,j) = 2.0*this->sigma_n;
            }
        }
    }

    Matrix<double> alpha_mat(this->X.size(0), 1, this->alpha.std_vector(), 'r');
    Matrix<double> alphaT_mat(1, this->X.size(0), this->alpha.std_vector(), 'c');

    Matrix<double> alpha_alphaT = alpha_mat*alphaT_mat;

    for(auto l = 0; l < nhyperparameters; ++l)
    {
        for( unsigned int i = 0; i < this->X.size(0); ++i )
        {
            for( unsigned int m = 0; m < dKdtheta[l].size(0); ++m )
            {
                dLdtheta[l] += (alpha_alphaT(i,m) - this->Kyinv(i,m))*dKdtheta[l](m,i);
            }
        }

        dLdtheta[l] *= 0.5;
    }

    return dLdtheta;
}
*/

// %%%%%
// %%%%%

// BELOW IS THE LOO-CV LIKELIHOOD, ETC.

inline double Gaussian_process::GaussianProcess::likelihood()
{
    double L = 0.0;

    for( auto i = 0; i < this->X.size(0); ++i )
    {
        // Eq. (5.12)
        double mu_i = this->y(i) - this->alpha(i)/this->Kyinv(i,i);
        double sigma_i2 = 1.0/this->Kyinv(i,i);

        double logp = -0.5*std::log(sigma_i2) - (this->y(i) - mu_i)*(this->y(i) - mu_i)/(2.0*sigma_i2) - 0.5*std::log(2.0*PI);

        L += logp;
    }

    return L;
}

inline std::vector<double> Gaussian_process::GaussianProcess::dlikelihood()
{
    // the total number of hyperparameters is the number of those in the covariance function + 1 noise term
    int nhyperparameters = k->nhyperparameters() + 1;

    std::vector<double> dLdtheta(nhyperparameters, 0.0);

    std::vector<Matrix<double>> dKdtheta(nhyperparameters, Matrix<double>(this->X.size(0), this->X.size(0)));

    for( auto i = 0; i < this->X.size(0); ++i )
    {
        for( auto j = 0; j < this->X.size(0); ++j )
        {
            // add the covariance function hyperparameters
            std::vector<double> dKdthetaij = this->k->ddtheta(this->X.row(i), this->X.row(j));

            for(auto l = 0; l < k->nhyperparameters(); ++l)
            {
                dKdtheta[l](i,j) = dKdthetaij[l];
            }

            // add the noise hyperparameter
            if(j != i)
            {
                dKdtheta[(nhyperparameters-1)](i,j) = 0.0;
            }
            else
            {
                dKdtheta[(nhyperparameters-1)](i,j) = 2.0*this->sigma_n;
            }
        }
    }

    std::vector<Matrix<double>> Z(nhyperparameters);

    for(auto j = 0; j < nhyperparameters; ++j)
    {
        Z[j] = this->Kyinv*dKdtheta[j];
    }

    std::vector<Matrix<double>> Z_Kinv(nhyperparameters);

    for(auto j = 0; j < nhyperparameters; ++j)
    {
        Z_Kinv[j] = Z[j]*this->Kyinv;
    }

    std::vector<Vector<double>> Z_alpha(nhyperparameters);

    for(auto j = 0; j < nhyperparameters; ++j)
    {
        Z_alpha[j] = Z[j]*this->alpha;
    }

    for(auto j = 0; j < nhyperparameters; ++j)
    {
        for( auto i = 0; i < this->X.size(0); ++i )
        {
            dLdtheta[j] += ((this->alpha(i)*Z_alpha[j](i) - 0.5*(1.0 + this->alpha(i)*this->alpha(i)/this->Kyinv(i,i))*Z_Kinv[j](i,i))/this->Kyinv(i,i));
        }
    }

    return dLdtheta;
}

// %%%%%
// %%%%%

//========================================================================
//========================================================================
//
// NAME: double GaussianProcess::predict(const Vector<double> &x) const
//
// DESC: (de-)Normalizes data to lie in the range [0,1].
//
//========================================================================
//========================================================================
inline double Gaussian_process::GaussianProcess::predict(const Vector<double> &x) const
{
    Vector<double> Kstar(this->X.size(0));

    for( auto i = 0; i < this->X.size(0); ++i )
    {
        Kstar(i) = this->k->cov(x, this->X.row(i));
    }

    return dot_product(Kstar, this->alpha);
}


//========================================================================
//========================================================================
//
// NAME: Vector<double> GaussianProcess::predict_deriv(const Vector<double> &x)
//
// DESC:
//
//========================================================================
//========================================================================
inline Vector<double> Gaussian_process::GaussianProcess::predict_deriv(const Vector<double> &x)
{
    Matrix<double> dKstar(this->X.size(1), this->X.size(0));

    for( auto j = 0; j < this->X.size(0); ++j )
    {
        Vector<double> dKstardxj = this->k->ddx(x, this->X.row(j));

        for( auto i = 0; i < dKstardxj.size(); ++i )
        {
            dKstar(i,j) = dKstardxj(i);
        }
    }

    return ( dKstar*(this->alpha) );
}
